#--------------------------------------------------#
#   Academic freedom and the onset of autocratization   #
#--------------------------------------------------#

# Title: "Academic freedom and the onset of autocratization" #
# Authors: "Pelke, Lars", FAU Erlangen-Nürnberg
# date: 2023-04-24
# journal: Democratization
# DOI: 10.1080/13510347.2023.2207213
# written under "R version 4.2.3 (2023-03-15)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages

library(tidyverse)
library(performance)
library(estimatr)
library(ggeffects)
library(ggokabeito)
library(zoo)
library(marginaleffects)
library(ggpubr)
library(ggthemes)
library(modelsummary)
library(splines)
library(brglm)
library(splines)
library(lmtest)
library(sandwich)
library(ERT)
library(Amelia)

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

# set you working directory here #

setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# load country_aggregated data

vdem_full_v11 <- readRDS("data/vdem11/Country_Year_V-Dem_Full+others_R_v11/V-Dem-CY-Full+Others-v11.rds")
vdem_full_v12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds")
academic_freedom_four_data <-  readRDS("data/academic_freedom_index_cy.rds")

#---------------------------------------------------#
#--------------------subset data--------------------#
#---------------------------------------------------#

vdem_subset_v11 <- vdem_full_v11 %>%
  dplyr::select(country_id, year, COWcode, country_name, country_text_id, v2x_polyarchy, v2x_libdem,
                v2caviol, v2caassemb, v2cagenmob, v2caconmob, v2cademmob, v2caautmob, v2cauni, 
                v2canuni, v2caprotac, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, e_regionpol_6C,
                v2xca_academ, e_pt_coup, e_regionpol, v2x_regime, v2xlg_legcon, v2x_jucon)

vdem_full_v12 <- vdem_full_v12 %>%
  dplyr::select(country_id, year, e_gdppc, e_pop)

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(vdem_full_v12, by = c("country_id", "year"))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(academic_freedom_four_data, by = c("country_text_id", "year")) %>%
  mutate(diff_v2xca_academ = v2xca_academ_four - v2xca_academ )

summary(vdem_subset_v11$diff_v2xca_academ)

vdem_subset_v11 %>% 
  filter(diff_v2xca_academ >= 0.1 | diff_v2xca_academ <= -0.1) %>%
  dplyr::select(country_name, year, v2xca_academ, v2xca_academ_four,diff_v2xca_academ)

## Rename Variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  rename(v2xca_academ_five = v2xca_academ, 
         v2xca_academ = v2xca_academ_four)

#---------------------------------------------------#
#--------------------Load ERT data------------------#
#---------------------------------------------------#
ert_data <- ERT::get_eps(cum_incl = 0.05, cum_turn = 0.05)

ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)


ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(ert_data, by =c("country_id", "year"))


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(aut_ep_id) %>%
  mutate(aut_duration =  year- first(year), 
         aut_duration = ifelse(is.na(aut_ep_id), 0, aut_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(aut_ep != lag(aut_ep, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))


#---------------------------------------------------#
#------------Democracy Stock Variable---------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.01)) %>%
  dplyr::select(country_id, country_name, year, v2x_polyarchy, democracy_stock, everything())

summary(vdem_subset_v11$democracy_stock)

#---------------------------------------------------#
#-----------------Statistical Analysis--------------#
#---------------------------------------------------#

## lags for independent variables ##

vdem_subset_v11_noNA <- vdem_subset_v11 %>%
  mutate(e_gdppclog = log(e_gdppc+1), 
         e_poplog = log(e_pop +1), 
         year_1900 = year -1900) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ), 
         v2cademmob_lag = lag(v2cademmob), 
         e_gdppclog_lag = lag(e_gdppclog), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_poplog_lag = lag(e_poplog), 
         v2x_polyarchy_lag = lag(v2x_polyarchy), 
         v2x_polyarchy_lag_squared = v2x_polyarchy_lag^2, 
         v2x_jucon_lag = lag(v2x_jucon), 
         v2xlg_legcon_lag = lag(v2xlg_legcon), 
         democracy_stock_lag = lag(democracy_stock))

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2, 
         v2xca_academ_lag_sq = v2xca_academ_lag^2)


## drop NA ## 
vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  ungroup() %>%
  filter(year >=1900) %>%
  dplyr::select(-aut_ep_id) %>%
  mutate(aut_ep_stage2 = ifelse(aut_ep_outcome %in% c(1,5), 1, 0)) %>%
  drop_na(v2xca_academ_lag, e_gdppclog, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
            e_poplog,region_factor ,v2x_jucon_lag, v2xlg_legcon_lag, year_1900, spell, country_id, democracy_stock_lag) 

summary(vdem_subset_v11_noNA)

## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11_noNA$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11_noNA$aut_ep_onset) # 239 autocratization onsets 

# list of all onsets #

list_of_auto_onsets <- vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  ungroup() %>%
  dplyr::select(country_name, year)

list_of_auto_onsets$year <- as.factor(list_of_auto_onsets$year)

#### compare number of autocratization onset in V-Dem sample ####

vdem_subset_v11 <- vdem_subset_v11 %>%
  filter(year >=1900) %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11$aut_ep_onset) # 360 autocratization onsets 

#### Summary Statistics Table B2 Supplementary Appendix ####

vdem_descriptives <- vdem_subset_v11_noNA %>%
  dplyr::select(year, aut_ep_onset, v2xca_academ_lag, democracy_stock_lag, e_gdppclog_lag, 
                e_gdppc_growth_roll5_lag, e_poplog_lag, regionpol_EDI_lag, 
                v2x_jucon_lag, v2xlg_legcon_lag)


##### Recoding onset of an autocratization episode as 0 and stable regime as 1 ####

vdem_subset_v11_noNA$aut_ep_onset_rev <- recode(vdem_subset_v11_noNA$aut_ep_onset, `1` = 0, `0` = 1)
table(vdem_subset_v11_noNA$aut_ep_onset_rev)
table(vdem_subset_v11_noNA$aut_ep_onset)


vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  distinct(country_id)

#---------------------------------------------------#
#--------------------- Main Models -----------------#
#---------------------------------------------------#

# Run model 1,
mod.1 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.1)


#Clustered SEs
ses_mod.1 <-  coeftest(mod.1, vcov = vcovCL(mod.1, cluster = vdem_subset_v11_noNA[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.1

# Run model 1,
mod.2 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                        e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.2)

#Clustered SEs
ses_mod.2 <-  coeftest(mod.2, vcov = vcovCL(mod.2, cluster = vdem_subset_v11_noNA[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.2

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1[,2], ses_mod.2[,2])
p.values <- list(ses_mod.1[,4], ses_mod.2[,4])


modelsummary::modelsummary(list(mod.1, mod.2),
                           output = "results/TableC5.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


# Plot marginal effects 

af_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                terms = c("v2xca_academ_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_onset


democracy_stock_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                 terms = c("democracy_stock_lag[all]"),
                                 vcov.fun = "vcovCL", 
                                 vcov.type = "HC1",
                                 vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                 alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

democracy_stock_onset

af_edi_onset_1 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1 <- data.frame(x = af_edi_onset_1$data$x,
                             academic_freedom = af_edi_onset_1$data$group,
                             predicted = af_edi_onset_1$data$predicted,
                             lower = af_edi_onset_1$data$conf.low,
                             upper = af_edi_onset_1$data$conf.high)

af_edi_onset_1_plot <-ggplot(data = af_edi_onset_1) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom\nIndex", fill =  "Academic Freedom\nIndex") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v12_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2 <- data.frame(x = af_edi_onset_2$data$x,
                             democracy_stock_lag = af_edi_onset_2$data$group,
                             predicted = af_edi_onset_2$data$predicted,
                             lower = af_edi_onset_2$data$conf.low,
                             upper = af_edi_onset_2$data$conf.high)

af_edi_onset_2_plot <-ggplot(data = af_edi_onset_2) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

figure <- ggpubr::ggarrange(af_onset, democracy_stock_onset,af_edi_onset_1_plot, af_edi_onset_2_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))

ggsave("figures/Figure_C2_Appendix.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")


#--------------------------------------------------#
#  Missings data imputation        #
#--------------------------------------------------#

# clear workspace
rm(list=ls())

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

#setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# load country_aggregated data

vdem_full_v11 <- readRDS("data/vdem11/Country_Year_V-Dem_Full+others_R_v11/V-Dem-CY-Full+Others-v11.rds")
vdem_full_v12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds")
academic_freedom_four_data <-  readRDS("data/academic_freedom_index_cy.rds")

#---------------------------------------------------#
#--------------------subset data--------------------#
#---------------------------------------------------#

vdem_subset_v11 <- vdem_full_v11 %>%
  dplyr::select(country_id, year, COWcode, country_name, country_text_id, v2x_polyarchy, v2x_libdem,
                v2caviol, v2caassemb, v2cagenmob, v2caconmob, v2cademmob, v2caautmob, v2cauni, 
                v2canuni, v2caprotac, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, e_regionpol_6C,
                v2xca_academ, e_pt_coup, e_regionpol, v2x_regime, v2xlg_legcon, v2x_jucon, v2cauni)

vdem_full_v12 <- vdem_full_v12 %>%
  dplyr::select(country_id, year, e_gdppc, e_pop)

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(vdem_full_v12, by = c("country_id", "year"))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(academic_freedom_four_data, by = c("country_text_id", "year")) %>%
  mutate(diff_v2xca_academ = v2xca_academ_four - v2xca_academ )

summary(vdem_subset_v11$diff_v2xca_academ)


## Rename Variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  rename(v2xca_academ_five = v2xca_academ, 
         v2xca_academ = v2xca_academ_four)

#---------------------------------------------------#
#--------------------Load ERT data------------------#
#---------------------------------------------------#

ert_data <- read.csv("data/ERT-3.1/inst/ERT.csv")

ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)

ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(ert_data, by =c("country_id", "year"))


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(aut_ep_id) %>%
  mutate(aut_duration =  year- first(year), 
         aut_duration = ifelse(is.na(aut_ep_id), 0, aut_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(aut_ep != lag(aut_ep, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))


#---------------------------------------------------#
#------------Democracy Stock Variable---------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.01)) %>%
  dplyr::select(country_id, country_name, year, v2x_polyarchy, democracy_stock, everything())

summary(vdem_subset_v11$democracy_stock)

#---------------------------------------------------#
#-----------------Statistical Analysis--------------#
#---------------------------------------------------#

## lags for independent variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  mutate(e_gdppclog = log(e_gdppc+1), 
         e_poplog = log(e_pop +1), 
         year_1900 = year -1900) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ), 
         v2cademmob_lag = lag(v2cademmob), 
         e_gdppclog_lag = lag(e_gdppclog), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_poplog_lag = lag(e_poplog), 
         v2x_polyarchy_lag = lag(v2x_polyarchy), 
         v2x_polyarchy_lag_squared = v2x_polyarchy_lag^2, 
         v2x_jucon_lag = lag(v2x_jucon), 
         v2xlg_legcon_lag = lag(v2xlg_legcon), 
         democracy_stock_lag = lag(democracy_stock))

vdem_subset_v11 <- vdem_subset_v11 %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2, 
         v2xca_academ_lag_sq = v2xca_academ_lag^2)


## drop NA ## 
vdem_subset_v11 <- vdem_subset_v11 %>%
  ungroup() %>%
  filter(year >=1900) %>%
  dplyr::select(-aut_ep_id) %>%
  mutate(aut_ep_stage2 = ifelse(aut_ep_outcome %in% c(1,5), 1, 0)) 

## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11$aut_ep_onset) # 239 autocratization onsets 

##### Recoding onset of an autocratization episode as 0 and stable regime as 1 ####

vdem_subset_v11$aut_ep_onset_rev <- recode(vdem_subset_v11$aut_ep_onset, `1` = 0, `0` = 1)
table(vdem_subset_v11$aut_ep_onset_rev)
table(vdem_subset_v11$aut_ep_onset)


vdem_subset_v11 %>%
  filter(aut_ep_onset == 1) %>%
  distinct(country_id)

### Delete country-years without a university ###

vdem_subset_v11_noNA <- vdem_subset_v11 %>%
  filter(v2cauni == 1)

#### AMELIA IMPUTATION ####

vdem_subset_v11_noNA_amelia <- vdem_subset_v11_noNA %>%
  dplyr::select(country_id, year, aut_ep_onset, v2xca_academ_lag, democracy_stock_lag,
                e_gdppclog_lag, e_gdppc_growth_roll5_lag, e_poplog_lag, regionpol_EDI_lag, 
                v2x_jucon_lag, v2xlg_legcon_lag, region_factor, year_1900, year_sq)

class(vdem_subset_v11_noNA_amelia)
vdem_subset_v11_noNA_amelia <- as.data.frame(vdem_subset_v11_noNA_amelia)

a.data_amelia <- amelia(vdem_subset_v11_noNA_amelia, 
                              ts= "year", cs = "country_id", 
                              idvars=c("aut_ep_onset", "region_factor", "year_1900"), m=10, 
                              p2s=0)
summary(a.data_amelia)
summary(a.data_amelia$imputations[[1]])


#### Analysis ####

## Loading necessary packages ##
library(tidyverse)
library(Amelia)
library(broom)
library(brglm2)

## Build one data frame for regression analysis ##

all_imputations <- bind_rows(unclass(a.data_amelia$imputations), .id = "m") %>%
  group_by(m) %>%
  nest()

all_imputations

## Model 1

v1 <- all_imputations %>%
  mutate(model = data %>% map(~brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                                              e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                                              regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                                              region_factor +
                                              year_1900 + year_sq, 
                                            data = ., family = binomial(link = "probit"),
                                            method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                                            pl=T)), 
         tidied = model %>% map(~ tidy(., conf.int = FALSE)),
         glance = model %>% map(~ glance(.)))
v1

#Inspect the data for imputation dataset 1
v1 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v1_par <- v1 %>%
  unnest(tidied) %>%
  dplyr::select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v1_par <- v1_par %>%
  dplyr::select(-obs)

v1_par

# Extract the coefficients
v1_coefs <- v1_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  dplyr::select(-m, -key)
v1_coefs

# Extract the standard errors
v1_ses <- v1_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  dplyr::select(-m, -key)


# Mi meld function Amelia
v1_melded <- mi.meld(v1_coefs, v1_ses)
v1_melded 

# Regression coefficient table
v1_model_dg <- v1 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v1_table <- as.data.frame(cbind(t(v1_melded$q.mi),
                                t(v1_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  dplyr::select(term, everything(.)) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v1_model_dg),
         conf.high = estimate + std.error * qt(0.975, v1_model_dg),
         p.value = 2 * pt(abs(statistic), v1_model_dg, lower.tail = FALSE))

v1_table

v1_data <- v1_table %>%
  drop_na()


## Model 2

v2 <- all_imputations %>%
  mutate(model = data %>% map(~brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                                        v2xca_academ_lag:democracy_stock_lag +
                                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                                        region_factor +
                                        year_1900 + year_sq, 
                                      data = ., family = binomial(link = "probit"),
                                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                                      pl=T)),
         tidied = model %>% map(~ tidy(., conf.int = FALSE)),
         glance = model %>% map(~ glance(.)))
v2


#Inspect the data for imputation dataset 1
v2 %>%
  filter(m == "imp1") %>%
  unnest(tidied)

# Create a wide data frame of just the coefficients and standard errors
v2_par <- v2 %>%
  unnest(tidied) %>%
  dplyr::select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  group_by(term, m) %>%
  mutate(obs = row_number()) %>%
  spread(term, value) 

v2_par <- v2_par %>%
  dplyr::select(-obs)

v2_par

# Extract the coefficients
v2_coefs <- v2_par %>%
  filter(key == "estimate") %>%
  ungroup() %>%
  dplyr::select(-m, -key)
v2_coefs

# Extract the standard errors
v2_ses <- v2_par %>%
  filter(key == "std.error") %>%
  ungroup() %>%
  dplyr::select(-m, -key)


# Mi meld function Amelia
v2_melded <- mi.meld(v2_coefs, v2_ses)
v2_melded 

# Regression coefficient table
v2_model_dg <- v2 %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

v2_table <- as.data.frame(cbind(t(v2_melded$q.mi),
                                t(v2_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  dplyr::select(term, everything(.)) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, v2_model_dg),
         conf.high = estimate + std.error * qt(0.975, v2_model_dg),
         p.value = 2 * pt(abs(statistic), v2_model_dg, lower.tail = FALSE))

v2_table

v2_data <- v2_table %>%
  drop_na()

## Extract Texreg Table ##

extract_broom <- function(tidy_model, glance_model) {
  # get estimates/standard errors from tidy
  coef <- tidy_model$estimate
  coef.names <- as.character(tidy_model$term)
  se <- tidy_model$std.error
  pvalues <- tidy_model$p.value
  # get goodness-of-fit statistics from glance
  glance_transposed <- as_tibble(cbind(name = names(glance_model), t(glance_model)))
  gof.names <- as.character(glance_transposed$name)
  gof <- as.double(glance_transposed$value)
  gof.decimal <- gof %% 1 > 0
  tr_object <- texreg::createTexreg(coef.names = coef.names,
                                    coef = coef,
                                    se = se,
                                    pvalues = pvalues,
                                    gof.names = gof.names,
                                    gof = gof,
                                    gof.decimal = gof.decimal)
  return(tr_object)
}

extract_broom <- function(tidy_model, glance_model) {
  # get estimates/standard errors from tidy
  coef <- tidy_model$estimate
  coef.names <- as.character(tidy_model$term)
  se <- tidy_model$std.error
  pvalues <- tidy_model$p.value
  tr_object <- texreg::createTexreg(coef.names = coef.names,
                                    coef = coef,
                                    se = se,
                                    pvalues = pvalues)
  return(tr_object)
}

v1_tex <- extract_broom(v1_data)
v2_tex <- extract_broom(v2_data)

library(texreg)
texreg(list(v1_tex, v2_tex), 
       head.tag = TRUE, body.tag = TRUE,
       digits = 3, ci.force = TRUE, 
       custom.coef.names = c("Intercept", 
                             "Democratic Stock", 
                             "GDP growth", 
                             "GDP pc log", 
                             "Population log", 
                             "Academic Freedom", 
                             "Academic Freedom ^2", 
                             "Eastern Europe and Central Asia", 
                             "MENA", 
                             "Subsaharan Africa", 
                             "Western Europe and North America", 
                             "Asia and Pacific", 
                             "Regional democracy levels",
                             "Judicial constraints on executive", 
                             "Legislative constraints on executive", 
                             "Year", 
                             "Year squared", 
                             "AFI * Democratic Stock"),
       booktabs = TRUE,
       use.packages = FALSE,
       caption = "Estimated effect of academic freedom on probability of autocratization onset (Multiple Imputation)",
       fontsize = "scriptsize",
       label = "table:C6")

####Manually fill additional information regression ####

## Prediting AIC; BIC; and Log Likelihood##

## Function written by Andrew Heiss 
##(https://www.andrewheiss.com/blog/2018/03/08/amelia-broom-huxtable/)

tidy.melded <- function(x, conf.int = FALSE, conf.level = 0.95) {
  # Get the df from one of the models
  model_degrees_freedom <- glance(x[[1]])$df.residual
  
  # Create matrices of the estimates and standard errors
  params <- data_frame(models = x) %>%
    mutate(m = 1:n(),
           tidied = models %>% map(tidy)) %>% 
    unnest(tidied) %>%
    dplyr::select(m, term, estimate, std.error) %>%
    gather(key, value, estimate, std.error) %>%
    mutate(term = fct_inorder(term)) %>%  # Order the terms so that spread() keeps them in order
    spread(term, value)
  
  just_coefs <- params %>% filter(key == "estimate") %>% dplyr::select(-m, -key)
  just_ses <- params %>% filter(key == "std.error") %>% dplyr::select(-m, -key)
  
  # Meld the coefficients with Rubin's rules
  coefs_melded <- mi.meld(just_coefs, just_ses)
  
  # Create tidy output
  output <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                t(coefs_melded$se.mi))) %>%
    magrittr::set_colnames(c("estimate", "std.error")) %>%
    mutate(term = rownames(.)) %>%
    dplyr::select(term, everything()) %>%
    mutate(statistic = estimate / std.error,
           p.value = 2 * pt(abs(statistic), model_degrees_freedom, lower.tail = FALSE))
  
  # Add confidence intervals if needed
  if (conf.int & conf.level) {
    # Convert conf.level to tail values (0.025 when it's 0.95)
    a <- (1 - conf.level) / 2
    
    output <- output %>% 
      mutate(conf.low = estimate + std.error * qt(a, model_degrees_freedom),
             conf.high = estimate + std.error * qt((1 - a), model_degrees_freedom))
  }
  
  # tidy objects only have a data.frame class, not tbl_df or anything else
  class(output) <- "data.frame"
  output
}

glance.melded <- function(x) {
  # Because the properly melded parameters and the simple average of the
  # parameters of these models are roughly the same (see
  # https://www.andrewheiss.com/blog/2018/03/07/amelia-tidy-melding/), for the
  # sake of simplicty we just take the average here
  output <- data_frame(models = x) %>%
    mutate(glance = models %>% map(glance)) %>%
    unnest(glance) %>%
    summarize_at(vars(logLik, AIC, BIC),
                 funs(mean)) %>%
    mutate(m = as.integer(length(x)))
  
  # glance objects only have a data.frame class, not tbl_df or anything else
  class(output) <- "data.frame"
  output
}

nobs.melded <- function(x, ...) {
  # Take the number of observations from the first model
  nobs(x[[1]])
}

#### Extract BIC, AIV, Log-Lik and F form the imputed models ####

glance_melded(v1)
v1[[1]]$aic
v1_imputed[[1]]$null.deviance

v2_imputed[[1]]$aic
v2_imputed[[1]]$null.deviance


################################################################################
################################################################################

#### Analysis for Democracies ####

# clear workspace
rm(list=ls())


#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

#setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# load country_aggregated data

vdem_full_v11 <- readRDS("data/vdem11/Country_Year_V-Dem_Full+others_R_v11/V-Dem-CY-Full+Others-v11.rds")
vdem_full_v12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds")
academic_freedom_four_data <-  readRDS("data/academic_freedom_index_cy.rds")

#---------------------------------------------------#
#--------------------subset data--------------------#
#---------------------------------------------------#

vdem_subset_v11 <- vdem_full_v11 %>%
  dplyr::select(country_id, year, COWcode, country_name, country_text_id, v2x_polyarchy, v2x_libdem,
                v2caviol, v2caassemb, v2cagenmob, v2caconmob, v2cademmob, v2caautmob, v2cauni, 
                v2canuni, v2caprotac, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, e_regionpol_6C,
                v2xca_academ, e_pt_coup, e_regionpol, v2x_regime, v2xlg_legcon, v2x_jucon)

vdem_full_v12 <- vdem_full_v12 %>%
  dplyr::select(country_id, year, e_gdppc, e_pop)

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(vdem_full_v12, by = c("country_id", "year"))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(academic_freedom_four_data, by = c("country_text_id", "year")) %>%
  mutate(diff_v2xca_academ = v2xca_academ_four - v2xca_academ )

summary(vdem_subset_v11$diff_v2xca_academ)

vdem_subset_v11 %>% 
  filter(diff_v2xca_academ >= 0.1 | diff_v2xca_academ <= -0.1) %>%
  dplyr::select(country_name, year, v2xca_academ, v2xca_academ_four,diff_v2xca_academ)

## Rename Variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  rename(v2xca_academ_five = v2xca_academ, 
         v2xca_academ = v2xca_academ_four)

#---------------------------------------------------#
#--------------------Load ERT data------------------#
#---------------------------------------------------#

ert_data <- read.csv("data/ERT-3.1/inst/ERT.csv")

ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)


ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(ert_data, by =c("country_id", "year"))


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(aut_ep_id) %>%
  mutate(aut_duration =  year- first(year), 
         aut_duration = ifelse(is.na(aut_ep_id), 0, aut_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(aut_ep != lag(aut_ep, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))


#---------------------------------------------------#
#------------Democracy Stock Variable---------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.01)) %>%
  dplyr::select(country_id, country_name, year, v2x_polyarchy, democracy_stock, everything())

summary(vdem_subset_v11$democracy_stock)

#---------------------------------------------------#
#-----------------Statistical Analysis--------------#
#---------------------------------------------------#

## lags for independent variables ##

vdem_subset_v11_noNA <- vdem_subset_v11 %>%
  mutate(e_gdppclog = log(e_gdppc+1), 
         e_poplog = log(e_pop +1), 
         year_1900 = year -1900) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ), 
         v2cademmob_lag = lag(v2cademmob), 
         e_gdppclog_lag = lag(e_gdppclog), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_poplog_lag = lag(e_poplog), 
         v2x_polyarchy_lag = lag(v2x_polyarchy), 
         v2x_polyarchy_lag_squared = v2x_polyarchy_lag^2, 
         v2x_jucon_lag = lag(v2x_jucon), 
         v2xlg_legcon_lag = lag(v2xlg_legcon), 
         democracy_stock_lag = lag(democracy_stock))

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2, 
         v2xca_academ_lag_sq = v2xca_academ_lag^2)


## drop NA ## 
vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  ungroup() %>%
  filter(year >=1900) %>%
  dplyr::select(-aut_ep_id) %>%
  mutate(aut_ep_stage2 = ifelse(aut_ep_outcome %in% c(1,5), 1, 0)) %>%
  drop_na(v2xca_academ_lag, e_gdppclog, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
          e_poplog,region_factor ,v2x_jucon_lag, v2xlg_legcon_lag, year_1900, spell, country_id, democracy_stock_lag) 

summary(vdem_subset_v11_noNA)

## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11_noNA$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11_noNA$aut_ep_onset) # 239 autocratization onsets 

# list of all onsets #

list_of_auto_onsets <- vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  ungroup() %>%
  dplyr::select(country_name, year)

list_of_auto_onsets$year <- as.factor(list_of_auto_onsets$year)

## Filter the dataset for democratic country-years##

vdem_subset_v11_noNA_dem <- vdem_subset_v11_noNA %>%
  filter(v2x_regime>=2) # 4369 country-years

summary(vdem_subset_v11_noNA_dem$aut_ep_onset) # 1 % of all country-years
table(vdem_subset_v11_noNA_dem$aut_ep_onset) # 45 autocratization onsets 

#### Summary Statistics Table B2 Supplementary Appendix ####

vdem_descriptives <- vdem_subset_v11_noNA_dem %>%
  dplyr::select(year, aut_ep_onset, v2xca_academ_lag, democracy_stock_lag, e_gdppclog_lag, 
                e_gdppc_growth_roll5_lag, e_poplog_lag, regionpol_EDI_lag, 
                v2x_jucon_lag, v2xlg_legcon_lag)


##### Recoding onset of an autocratization episode as 0 and stable regime as 1 ####

vdem_subset_v11_noNA_dem$aut_ep_onset_rev <- recode(vdem_subset_v11_noNA_dem$aut_ep_onset, `1` = 0, `0` = 1)
table(vdem_subset_v11_noNA_dem$aut_ep_onset_rev)
table(vdem_subset_v11_noNA_dem$aut_ep_onset)


vdem_subset_v11_noNA_dem %>%
  filter(aut_ep_onset == 1) %>%
  distinct(country_id)

#---------------------------------------------------#
#--------------------- Main Models -----------------#
#---------------------------------------------------#

# Run model 1,
mod.1 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA_dem, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.1)


#Clustered SEs
ses_mod.1 <-  coeftest(mod.1, vcov = vcovCL(mod.1, cluster = vdem_subset_v11_noNA_dem[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.1

# Run model 1,
mod.2 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                        e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA_dem, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.2)

#Clustered SEs
ses_mod.2 <-  coeftest(mod.2, vcov = vcovCL(mod.2, cluster = vdem_subset_v11_noNA_dem[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.2

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1[,2], ses_mod.2[,2])
p.values <- list(ses_mod.1[,4], ses_mod.2[,4])


modelsummary::modelsummary(list(mod.1, mod.2),
                           output = "results/TableC7.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


# Plot marginal effects 

af_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                terms = c("v2xca_academ_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_onset


democracy_stock_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                             terms = c("democracy_stock_lag[all]"),
                                             vcov.fun = "vcovCL", 
                                             vcov.type = "HC1",
                                             vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                             alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

democracy_stock_onset

af_edi_onset_1 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1 <- data.frame(x = af_edi_onset_1$data$x,
                             academic_freedom = af_edi_onset_1$data$group,
                             predicted = af_edi_onset_1$data$predicted,
                             lower = af_edi_onset_1$data$conf.low,
                             upper = af_edi_onset_1$data$conf.high)

af_edi_onset_1_plot <-ggplot(data = af_edi_onset_1) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom\nIndex", fill =  "Academic Freedom\nIndex") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v12_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2 <- data.frame(x = af_edi_onset_2$data$x,
                             democracy_stock_lag = af_edi_onset_2$data$group,
                             predicted = af_edi_onset_2$data$predicted,
                             lower = af_edi_onset_2$data$conf.low,
                             upper = af_edi_onset_2$data$conf.high)

af_edi_onset_2_plot <-ggplot(data = af_edi_onset_2) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

figure <- ggpubr::ggarrange(af_onset, democracy_stock_onset,af_edi_onset_1_plot, af_edi_onset_2_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))
ggsave("figures/Figure_C3_Appendix_dem.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")

###############################################################################
## Autocracies ##

## Filter the dataset for democratic country-years##

vdem_subset_v11_noNA_aut <- vdem_subset_v11_noNA %>%
  filter(v2x_regime<2) # 5538 country-years

summary(vdem_subset_v11_noNA_aut$aut_ep_onset) # 2.1 % of all country-years
table(vdem_subset_v11_noNA_aut$aut_ep_onset) # 117 autocratization onsets 

#### Summary Statistics Table B2 Supplementary Appendix ####

vdem_descriptives <- vdem_subset_v11_noNA_aut %>%
  dplyr::select(year, aut_ep_onset, v2xca_academ_lag, democracy_stock_lag, e_gdppclog_lag, 
                e_gdppc_growth_roll5_lag, e_poplog_lag, regionpol_EDI_lag, 
                v2x_jucon_lag, v2xlg_legcon_lag)


##### Recoding onset of an autocratization episode as 0 and stable regime as 1 ####

vdem_subset_v11_noNA_aut$aut_ep_onset_rev <- recode(vdem_subset_v11_noNA_aut$aut_ep_onset, `1` = 0, `0` = 1)
table(vdem_subset_v11_noNA_aut$aut_ep_onset_rev)
table(vdem_subset_v11_noNA_aut$aut_ep_onset)


vdem_subset_v11_noNA_aut %>%
  filter(aut_ep_onset == 1) %>%
  distinct(country_id)

#---------------------------------------------------#
#--------------------- Main Models -----------------#
#---------------------------------------------------#

# Run model 1,
mod.1 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA_aut, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.1)


#Clustered SEs
ses_mod.1 <-  coeftest(mod.1, vcov = vcovCL(mod.1, cluster = vdem_subset_v11_noNA_aut[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.1

# Run model 1,
mod.2 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                        e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA_aut, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.2)

#Clustered SEs
ses_mod.2 <-  coeftest(mod.2, vcov = vcovCL(mod.2, cluster = vdem_subset_v11_noNA_aut[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.2

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1[,2], ses_mod.2[,2])
p.values <- list(ses_mod.1[,4], ses_mod.2[,4])


modelsummary::modelsummary(list(mod.1, mod.2),
                           output = "results/TableC8.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


# Plot marginal effects 

af_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                terms = c("v2xca_academ_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_aut, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_onset


democracy_stock_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                             terms = c("democracy_stock_lag[all]"),
                                             vcov.fun = "vcovCL", 
                                             vcov.type = "HC1",
                                             vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                             alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_aut, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

democracy_stock_onset

af_edi_onset_1 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_dem, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1 <- data.frame(x = af_edi_onset_1$data$x,
                             academic_freedom = af_edi_onset_1$data$group,
                             predicted = af_edi_onset_1$data$predicted,
                             lower = af_edi_onset_1$data$conf.low,
                             upper = af_edi_onset_1$data$conf.high)

af_edi_onset_1_plot <-ggplot(data = af_edi_onset_1) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom\nIndex", fill =  "Academic Freedom\nIndex") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_aut, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v12_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_aut, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2 <- data.frame(x = af_edi_onset_2$data$x,
                             democracy_stock_lag = af_edi_onset_2$data$group,
                             predicted = af_edi_onset_2$data$predicted,
                             lower = af_edi_onset_2$data$conf.low,
                             upper = af_edi_onset_2$data$conf.high)

af_edi_onset_2_plot <-ggplot(data = af_edi_onset_2) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_aut, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

figure <- ggpubr::ggarrange(af_onset, democracy_stock_onset,af_edi_onset_1_plot, af_edi_onset_2_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))
ggsave("figures/Figure_C4_Appendix_aut.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")



#---------------------------------------------------#
#--Democratic Stock Depreciation rate of 0.05 -------#
#---------------------------------------------------#

#load V-Dem full dataset
#load(here("data_out/vdem_full_v11.Rda"))

# set you working directory here #

#setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# load country_aggregated data

vdem_full_v11 <- readRDS("data/vdem11/Country_Year_V-Dem_Full+others_R_v11/V-Dem-CY-Full+Others-v11.rds")
vdem_full_v12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds")
academic_freedom_four_data <-  readRDS("data/academic_freedom_index_cy.rds")

#---------------------------------------------------#
#--------------------subset data--------------------#
#---------------------------------------------------#

vdem_subset_v11 <- vdem_full_v11 %>%
  dplyr::select(country_id, year, COWcode, country_name, country_text_id, v2x_polyarchy, v2x_libdem,
                v2caviol, v2caassemb, v2cagenmob, v2caconmob, v2cademmob, v2caautmob, v2cauni, 
                v2canuni, v2caprotac, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, e_regionpol_6C,
                v2xca_academ, e_pt_coup, e_regionpol, v2x_regime, v2xlg_legcon, v2x_jucon)

vdem_full_v12 <- vdem_full_v12 %>%
  dplyr::select(country_id, year, e_gdppc, e_pop)

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(vdem_full_v12, by = c("country_id", "year"))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(academic_freedom_four_data, by = c("country_text_id", "year")) %>%
  mutate(diff_v2xca_academ = v2xca_academ_four - v2xca_academ )

summary(vdem_subset_v11$diff_v2xca_academ)

vdem_subset_v11 %>% 
  filter(diff_v2xca_academ >= 0.1 | diff_v2xca_academ <= -0.1) %>%
  dplyr::select(country_name, year, v2xca_academ, v2xca_academ_four,diff_v2xca_academ)

## Rename Variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  rename(v2xca_academ_five = v2xca_academ, 
         v2xca_academ = v2xca_academ_four)

#---------------------------------------------------#
#--------------------Load ERT data------------------#
#---------------------------------------------------#

ert_data <- read.csv("data/ERT-3.1/inst/ERT.csv")

ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)


ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(ert_data, by =c("country_id", "year"))


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(aut_ep_id) %>%
  mutate(aut_duration =  year- first(year), 
         aut_duration = ifelse(is.na(aut_ep_id), 0, aut_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(aut_ep != lag(aut_ep, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))


#---------------------------------------------------#
#------------Democracy Stock Variable---------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.05)) %>%
  dplyr::select(country_id, country_name, year, v2x_polyarchy, democracy_stock, everything())

summary(vdem_subset_v11$democracy_stock)

ggplot(subset(vdem_subset_v11, country_name == "Brazil"), aes(x = year)) +
  geom_line(aes(y = v2x_polyarchy), color = "red") +
  geom_line(aes(y = democracy_stock), color = "blue") +
  ylim(0,1) +
  theme_clean()



#---------------------------------------------------#
#-----------------Statistical Analysis--------------#
#---------------------------------------------------#

## lags for independent variables ##

vdem_subset_v11_noNA <- vdem_subset_v11 %>%
  mutate(e_gdppclog = log(e_gdppc+1), 
         e_poplog = log(e_pop +1), 
         year_1900 = year -1900) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ), 
         v2cademmob_lag = lag(v2cademmob), 
         e_gdppclog_lag = lag(e_gdppclog), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_poplog_lag = lag(e_poplog), 
         v2x_polyarchy_lag = lag(v2x_polyarchy), 
         v2x_polyarchy_lag_squared = v2x_polyarchy_lag^2, 
         v2x_jucon_lag = lag(v2x_jucon), 
         v2xlg_legcon_lag = lag(v2xlg_legcon), 
         democracy_stock_lag = lag(democracy_stock))

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2, 
         v2xca_academ_lag_sq = v2xca_academ_lag^2)


## drop NA ## 
vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  ungroup() %>%
  filter(year >=1900) %>%
  dplyr::select(-aut_ep_id) %>%
  mutate(aut_ep_stage2 = ifelse(aut_ep_outcome %in% c(1,5), 1, 0)) %>%
  drop_na(v2xca_academ_lag, e_gdppclog, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
          e_poplog,region_factor ,v2x_jucon_lag, v2xlg_legcon_lag, year_1900, spell, country_id, democracy_stock_lag) 

summary(vdem_subset_v11_noNA)

## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11_noNA$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11_noNA$aut_ep_onset) # 239 autocratization onsets 

# list of all onsets #

list_of_auto_onsets <- vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  ungroup() %>%
  dplyr::select(country_name, year)

list_of_auto_onsets$year <- as.factor(list_of_auto_onsets$year)

#### compare number of autocratization onset in V-Dem sample ####

vdem_subset_v11 <- vdem_subset_v11 %>%
  filter(year >=1900) %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11$aut_ep_onset) # 1.67 % of all country-years
table(vdem_subset_v11$aut_ep_onset) # 360 autocratization onsets 

#### Summary Statistics Table B2 Supplementary Appendix ####

vdem_descriptives <- vdem_subset_v11_noNA %>%
  dplyr::select(year, aut_ep_onset, v2xca_academ_lag, democracy_stock_lag, e_gdppclog_lag, 
                e_gdppc_growth_roll5_lag, e_poplog_lag, regionpol_EDI_lag, 
                v2x_jucon_lag, v2xlg_legcon_lag)


##### Recoding onset of an autocratization episode as 0 and stable regime as 1 ####

vdem_subset_v11_noNA$aut_ep_onset_rev <- recode(vdem_subset_v11_noNA$aut_ep_onset, `1` = 0, `0` = 1)
table(vdem_subset_v11_noNA$aut_ep_onset_rev)
table(vdem_subset_v11_noNA$aut_ep_onset)


vdem_subset_v11_noNA %>%
  filter(aut_ep_onset == 1) %>%
  distinct(country_id)

#---------------------------------------------------#
#--------------------- Main Models -----------------#
#---------------------------------------------------#

# Run model 1,
mod.1 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.1)


#Clustered SEs
ses_mod.1 <-  coeftest(mod.1, vcov = vcovCL(mod.1, cluster = vdem_subset_v11_noNA[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.1

# Run model 1,
mod.2 <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                        e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.2)

#Clustered SEs
ses_mod.2 <-  coeftest(mod.2, vcov = vcovCL(mod.2, cluster = vdem_subset_v11_noNA[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.2

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1[,2], ses_mod.2[,2])
p.values <- list(ses_mod.1[,4], ses_mod.2[,4])


modelsummary::modelsummary(list(mod.1, mod.2),
                           output = "results/TableC9.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


# Plot marginal effects 

af_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                terms = c("v2xca_academ_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_onset


democracy_stock_onset <-  sjPlot::plot_model(mod.1, type = "eff",
                                             terms = c("democracy_stock_lag[all]"),
                                             vcov.fun = "vcovCL", 
                                             vcov.type = "HC1",
                                             vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                             alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

democracy_stock_onset

af_edi_onset_1 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1 <- data.frame(x = af_edi_onset_1$data$x,
                             academic_freedom = af_edi_onset_1$data$group,
                             predicted = af_edi_onset_1$data$predicted,
                             lower = af_edi_onset_1$data$conf.low,
                             upper = af_edi_onset_1$data$conf.high)

af_edi_onset_1_plot <-ggplot(data = af_edi_onset_1) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom\nIndex", fill =  "Academic Freedom\nIndex") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2 <-  sjPlot::plot_model(mod.2, type = "eff",
                                      terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v12_noNA$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2 <- data.frame(x = af_edi_onset_2$data$x,
                             democracy_stock_lag = af_edi_onset_2$data$group,
                             predicted = af_edi_onset_2$data$predicted,
                             lower = af_edi_onset_2$data$conf.low,
                             upper = af_edi_onset_2$data$conf.high)

af_edi_onset_2_plot <-ggplot(data = af_edi_onset_2) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

figure <- ggpubr::ggarrange(af_onset, democracy_stock_onset,af_edi_onset_1_plot, af_edi_onset_2_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))

ggsave("figures/Figure_C5_Appendix.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")




#---------------------------------------------------#
#------------- Split Sample Strategy  --------------#
#---------------------------------------------------#

#load V-Dem full dataset
#load(here("data_out/vdem_full_v11.Rda"))

# set you working directory here #

#setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# load country_aggregated data

vdem_full_v11 <- readRDS("data/vdem11/Country_Year_V-Dem_Full+others_R_v11/V-Dem-CY-Full+Others-v11.rds")
vdem_full_v12 <- readRDS("data/vdem12/Country_Year_V-Dem_Full+others_R_v12/V-Dem-CY-Full+Others-v12.rds")
academic_freedom_four_data <-  readRDS("data/academic_freedom_index_cy.rds")

#---------------------------------------------------#
#--------------------subset data--------------------#
#---------------------------------------------------#

vdem_subset_v11 <- vdem_full_v11 %>%
  dplyr::select(country_id, year, COWcode, country_name, country_text_id, v2x_polyarchy, v2x_libdem,
                v2caviol, v2caassemb, v2cagenmob, v2caconmob, v2cademmob, v2caautmob, v2cauni, 
                v2canuni, v2caprotac, v2cafres, v2cafexch, v2cainsaut, v2casurv, v2clacfree, e_regionpol_6C,
                v2xca_academ, e_pt_coup, e_regionpol, v2x_regime, v2xlg_legcon, v2x_jucon)

vdem_full_v12 <- vdem_full_v12 %>%
  dplyr::select(country_id, year, e_gdppc, e_pop)

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(vdem_full_v12, by = c("country_id", "year"))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(academic_freedom_four_data, by = c("country_text_id", "year")) %>%
  mutate(diff_v2xca_academ = v2xca_academ_four - v2xca_academ )

summary(vdem_subset_v11$diff_v2xca_academ)

vdem_subset_v11 %>% 
  filter(diff_v2xca_academ >= 0.1 | diff_v2xca_academ <= -0.1) %>%
  dplyr::select(country_name, year, v2xca_academ, v2xca_academ_four,diff_v2xca_academ)

## Rename Variables ##

vdem_subset_v11 <- vdem_subset_v11 %>%
  rename(v2xca_academ_five = v2xca_academ, 
         v2xca_academ = v2xca_academ_four)

#---------------------------------------------------#
#--------------------Load ERT data------------------#
#---------------------------------------------------#

ert_data <- read.csv("data/ERT-3.1/inst/ERT.csv")

ert_data <- ert_data %>%
  group_by(dem_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_increase = last_v2x_polyarchy - first_v2x_polyarchy)


ert_data <- ert_data %>%
  group_by(aut_ep_id) %>%
  mutate(last_v2x_polyarchy = last(v2x_polyarchy), 
         first_v2x_polyarchy = first(v2x_polyarchy)) %>%
  ungroup() %>%
  mutate(v2x_polyarchy_decrease = last_v2x_polyarchy - first_v2x_polyarchy) %>%
  dplyr::select(-c(last_v2x_polyarchy, first_v2x_polyarchy))

ert_data <- ert_data %>%
  mutate(v2x_polyarchy_increase = ifelse(is.na(dem_ep_id), NA,v2x_polyarchy_increase )) %>%
  mutate(v2x_polyarchy_decrease = ifelse(is.na(aut_ep_id), NA,v2x_polyarchy_decrease ))

ert_data <- ert_data %>%
  dplyr::select(-c(country_text_id, country_name, v2x_regime:v2x_polyarchy_codehigh))

vdem_subset_v11 <- vdem_subset_v11 %>%
  left_join(ert_data, by =c("country_id", "year"))


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(e_regionpol, year) %>%
  mutate(regionpol_EDI = mean(v2x_polyarchy, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country_id) %>%
  mutate(e_gdppc_growth = ((e_gdppc - dplyr::lag(e_gdppc))/dplyr::lag(e_gdppc))*100) %>%
  mutate(e_gdppc_growth_roll5 = rollmean(e_gdppc_growth, k = 5, fill = NA)) %>%
  ungroup() %>%
  group_by(aut_ep_id) %>%
  mutate(aut_duration =  year- first(year), 
         aut_duration = ifelse(is.na(aut_ep_id), 0, aut_duration)) %>%
  group_by(country_id) %>%
  mutate(group_id = cumsum(aut_ep != lag(aut_ep, default = FALSE))) %>% # group_id for each autocra_period
  group_by(country_id, group_id) %>%
  mutate(spell = year-first(year), 
         spell = ifelse(is.na(aut_ep_id), spell, 0))


#---------------------------------------------------#
#------------Democracy Stock Variable---------------#
#---------------------------------------------------#


decay_sum <- function(tm, vl, decay) {
  last_time <- 0
  current_sum <- 0
  sums <- numeric(length(vl))
  ldecay <- (1-decay)
  for (i in 1:length(vl)) {
    delta <- as.numeric(tm[i] - last_time)
    current_sum <- current_sum * (ldecay * delta) + vl[i]
    last_time <- tm[i]
    sums[i] <- current_sum
  }
  sums
}

annual_depricition_function  <- function(year, var, decay) {
  last_time <- first(year)
  current_sum <- 0
  sums <- numeric(length(var))
  ldecay <- (1-decay)
  for (i in 1:length(var)) {
    delta <- as.numeric(year[i] - last_time)
    current_sum <- ldecay * (current_sum + decay * var[i])
    last_time <- year[i]
    sums[i] <- current_sum
  }
  sums
}


## Functions ##

na_interpolation2 <- function(x, option = "linear", maxgap) {
  library(imputeTS)
  library(dplyr)
  
  total_not_missing <- sum(!is.na(x))
  
  # check there is sufficient data for na_interpolation 
  if(total_not_missing < 2) {x} 
  
  else
    
    # replace takes an input vector, a T/F vector & replacement value
  {replace(
    # input vector is interpolated data
    # this will impute leading/lagging NAs which we don't want 
    imputeTS::na_interpolation(x, option = option, maxgap = maxgap), 
    
    # create T/F vector for NAs,  
    is.na(na.approx(x, na.rm = FALSE)), 
    
    # replace TRUE with NA in input vector  
    NA) 
  }
}


vdem_subset_v11 <- vdem_subset_v11 %>%
  group_by(country_id) %>%
  mutate(v2x_polyarchy_interpolated = na_interpolation2(v2x_polyarchy, option = "linear", maxgap = 5)) %>%
  drop_na(v2x_polyarchy_interpolated) %>%
  mutate(democracy_stock = annual_depricition_function(year, v2x_polyarchy_interpolated, 0.01)) %>%
  dplyr::select(country_id, country_name, year, v2x_polyarchy, democracy_stock, everything())

summary(vdem_subset_v11$democracy_stock)

ggplot(subset(vdem_subset_v11, country_name == "Brazil"), aes(x = year)) +
  geom_line(aes(y = v2x_polyarchy), color = "red") +
  geom_line(aes(y = democracy_stock), color = "blue") +
  ylim(0,1) +
  theme_clean()


#---------------------------------------------------#
#-----------------Statistical Analysis--------------#
#---------------------------------------------------#

## lags for independent variables ##

vdem_subset_v11_noNA <- vdem_subset_v11 %>%
  mutate(e_gdppclog = log(e_gdppc+1), 
         e_poplog = log(e_pop +1), 
         year_1900 = year -1900) %>%
  group_by(country_id) %>%
  mutate(v2xca_academ_lag = lag(v2xca_academ), 
         v2cademmob_lag = lag(v2cademmob), 
         e_gdppclog_lag = lag(e_gdppclog), 
         e_gdppc_growth_roll5_lag = lag(e_gdppc_growth_roll5), 
         regionpol_EDI_lag = lag(regionpol_EDI), 
         e_poplog_lag = lag(e_poplog), 
         v2x_polyarchy_lag = lag(v2x_polyarchy), 
         v2x_polyarchy_lag_squared = v2x_polyarchy_lag^2, 
         v2x_jucon_lag = lag(v2x_jucon), 
         v2xlg_legcon_lag = lag(v2xlg_legcon), 
         democracy_stock_lag = lag(democracy_stock))

vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  mutate(region_factor = as.factor(e_regionpol_6C),
         region_factor = relevel(region_factor, ref=1), 
         spell = spell/100,
         spell_sq = spell^2/100,
         spell_cb = spell^3/100,
         year_sq = year_1900^2, 
         v2xca_academ_lag_sq = v2xca_academ_lag^2)


## drop NA ## 
vdem_subset_v11_noNA <- vdem_subset_v11_noNA %>%
  ungroup() %>%
  filter(year >=1900) %>%
  dplyr::select(-aut_ep_id) %>%
  mutate(aut_ep_stage2 = ifelse(aut_ep_outcome %in% c(1,5), 1, 0)) %>%
  drop_na(v2xca_academ_lag, e_gdppclog, e_gdppc_growth_roll5_lag, regionpol_EDI_lag, 
          e_poplog,region_factor ,v2x_jucon_lag, v2xlg_legcon_lag, year_1900, spell, country_id, democracy_stock_lag) 

summary(vdem_subset_v11_noNA)


#### Building Split Samples ####

vdem_subset_v11_noNA_1900_1989 <- vdem_subset_v11_noNA %>%
  filter(year <1990)

vdem_subset_v11_noNA_1990_2020 <- vdem_subset_v11_noNA %>%
  filter(year>=1990)


## setting ongoing events to NA as recommend by McGrath (2015)

vdem_subset_v11_noNA_1900_1989 <- vdem_subset_v11_noNA_1900_1989 %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11_noNA_1900_1989$aut_ep_onset) # 1.7 % of all country-years
table(vdem_subset_v11_noNA_1900_1989$aut_ep_onset) # 114 autocratization onsets 


vdem_subset_v11_noNA_1990_2020 <- vdem_subset_v11_noNA_1990_2020 %>%
  group_by(country_id) %>%
  mutate(aut_ep_onset = ifelse(aut_ep==1 & lag(aut_ep)== 1, NA, aut_ep)) %>%
  dplyr::select(country_id, year, aut_ep, aut_ep_onset, everything()) %>%
  drop_na(aut_ep_onset)

summary(vdem_subset_v11_noNA_1990_2020$aut_ep_onset) # 1,77 % of all country-years
table(vdem_subset_v11_noNA_1990_2020$aut_ep_onset) # 74 autocratization onsets 


# list of all onsets #

list_of_auto_onsets <- vdem_subset_v11_noNA_1990_2020 %>%
  filter(aut_ep_onset == 1) %>%
  ungroup() %>%
  dplyr::select(country_name, year)

list_of_auto_onsets$year <- as.factor(list_of_auto_onsets$year)

#---------------------------------------------------#
#------------------------ Models -------------------#
#---------------------------------------------------#

##### 1900-1989 ####

# Run model 1,
mod.1a <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA_1900_1989, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.1a)


#Clustered SEs
ses_mod.1a <-  coeftest(mod.1a, vcov = vcovCL(mod.1a, cluster = vdem_subset_v11_noNA_1900_1989[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.1a

# Run model 1,
mod.2a <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                        v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                        e_gdppc_growth_roll5_lag +  e_poplog_lag +
                        regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                        region_factor +
                        year_1900 + year_sq, 
                      data = vdem_subset_v11_noNA_1900_1989, family = binomial(link = "probit"),
                      method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                      pl=T)
summary(mod.2a)

#Clustered SEs
ses_mod.2a <-  coeftest(mod.2a, vcov = vcovCL(mod.2a, cluster = vdem_subset_v11_noNA_1900_1989[, "country_id"]),
                       type = "HC1", fix = T, cadjust = T)
ses_mod.2a

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1a[,2], ses_mod.2a[,2])
p.values <- list(ses_mod.1a[,4], ses_mod.2a[,4])


modelsummary::modelsummary(list(mod.1a, mod.2a),
                           output = "results/TableC10.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


### 1990-2020 ####

# Run model 1,
mod.1b <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                         e_gdppclog_lag + e_gdppc_growth_roll5_lag +  e_poplog_lag +
                         regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                         region_factor +
                         year_1900 + year_sq, 
                       data = vdem_subset_v11_noNA_1990_2020, family = binomial(link = "probit"),
                       method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                       pl=T)
summary(mod.1b)


#Clustered SEs
ses_mod.1b <-  coeftest(mod.1b, vcov = vcovCL(mod.1b, cluster = vdem_subset_v11_noNA_1990_2020[, "country_id"]),
                        type = "HC1", fix = T, cadjust = T)
ses_mod.1b

# Run model 1,
mod.2b <- brglm::brglm(as.factor(aut_ep_onset) ~  poly(v2xca_academ_lag, degree = 2) + democracy_stock_lag +
                         v2xca_academ_lag:democracy_stock_lag + e_gdppclog_lag +
                         e_gdppc_growth_roll5_lag +  e_poplog_lag +
                         regionpol_EDI_lag + v2x_jucon_lag + v2xlg_legcon_lag +
                         region_factor +
                         year_1900 + year_sq, 
                       data = vdem_subset_v11_noNA_1990_2020, family = binomial(link = "probit"),
                       method = "brglm.fit", control.brglm = brglm.control(br.maxit = 1000, br.trace = T),
                       pl=T)
summary(mod.2b)

#Clustered SEs
ses_mod.2b <-  coeftest(mod.2b, vcov = vcovCL(mod.2b, cluster = vdem_subset_v11_noNA_1990_2020[, "country_id"]),
                        type = "HC1", fix = T, cadjust = T)
ses_mod.2b

## standard errors and pvalues for 

standard.errors <- list(ses_mod.1b[,2], ses_mod.2b[,2])
p.values <- list(ses_mod.1b[,4], ses_mod.2b[,4])


modelsummary::modelsummary(list(mod.1b, mod.2b),
                           output = "results/TableC11.tex", 
                           fmt = 3,
                           vcov = "stata", cluster = "country_id", 
                           estimate  = c("{estimate}{stars}"),
                           statistic = 'conf.int', 
                           conf_level = .95, 
                           coef_rename = c("(Intercept)" = "Intercept", 
                                           "poly(v2xca_academ_lag, degree = 2)1" = "Academic Freedom", 
                                           "poly(v2xca_academ_lag, degree = 2)2" = "Academic Freedom ^2", 
                                           "democracy_stock_lag"  = "Democracy Stock", 
                                           "e_gdppclog_lag"  = "GDP pc log", 
                                           "e_gdppc_growth_roll5_lag" = "GDP growth", 
                                           "e_poplog_lag" = "Population log", 
                                           "regionpol_EDI_lag" = "Regional democracy levels", 
                                           "v2x_jucon_lag" =  "Judicial constraints on executive", 
                                           "v2xlg_legcon_lag" = "Legislative constraints on executive", 
                                           "region_factor5" = "Western Europe and North America", 
                                           "region_factor4" = "Subsaharan Africa", 
                                           "region_factor6" = "Asia and Pacific", 
                                           "region_factor1" = "Eastern Europe and Central Asia", 
                                           "region_factor3" = "MENA", 
                                           "year_1900" = "Year", 
                                           "year_sq" = "Year squared", 
                                           "democracy_stock_lag:v2xca_academ_lag" = "AFI * Democracy Stock"))


#### Plot marginal effects #### 

#### 1900-1989 ####

af_onset_a <-  sjPlot::plot_model(mod.1a, type = "eff",
                                terms = c("v2xca_academ_lag[all]"),
                                vcov.fun = "vcovCL", 
                                vcov.type = "HC1",
                                vcov.args = "cluster = vdem_subset_v11_noNA_1900_1989$country_id",
                                alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1900_1989, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


democracy_stock_onset_a <-  sjPlot::plot_model(mod.1a, type = "eff",
                                             terms = c("democracy_stock_lag[all]"),
                                             vcov.fun = "vcovCL", 
                                             vcov.type = "HC1",
                                             vcov.args = "cluster = vdem_subset_v11_noNA_1900_1989$country_id",
                                             alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1900_1989, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1a <-  sjPlot::plot_model(mod.2a, type = "eff",
                                      terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA_1900_1989$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1900_1989, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1a <- data.frame(x = af_edi_onset_1a$data$x,
                             academic_freedom = af_edi_onset_1a$data$group,
                             predicted = af_edi_onset_1a$data$predicted,
                             lower = af_edi_onset_1a$data$conf.low,
                             upper = af_edi_onset_1a$data$conf.high)

af_edi_onset_1a_plot <-ggplot(data = af_edi_onset_1a) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom Index", fill =  "Academic Freedom Index") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_1900_1989, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2a <-  sjPlot::plot_model(mod.2a, type = "eff",
                                      terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                      vcov.fun = "vcovCL", 
                                      vcov.type = "HC1",
                                      vcov.args = "cluster = vdem_subset_v11_noNA_1900_1989$country_id",
                                      alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2a <- data.frame(x = af_edi_onset_2a$data$x,
                             democracy_stock_lag = af_edi_onset_2a$data$group,
                             predicted = af_edi_onset_2a$data$predicted,
                             lower = af_edi_onset_2a$data$conf.low,
                             upper = af_edi_onset_2a$data$conf.high)

af_edi_onset_2a_plot <-ggplot(data = af_edi_onset_2a) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_1900_1989, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

figure <- ggpubr::ggarrange(af_onset_a, democracy_stock_onset_a,af_edi_onset_1a_plot, af_edi_onset_2a_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))

figure <-annotate_figure(figure, top = text_grob("1900-1989", 
                                                 color = "black", face = "bold", size = 14))

ggsave("figures/Figure_C7_Appendix.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")

#### 1990-2020 ####

af_onset_b <-  sjPlot::plot_model(mod.1b, type = "eff",
                                  terms = c("v2xca_academ_lag[all]"),
                                  vcov.fun = "vcovCL", 
                                  vcov.type = "HC1",
                                  vcov.args = "cluster = vdem_subset_v11_noNA_1990_2020$country_id",
                                  alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,0.2), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1990_2020, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

democracy_stock_onset_b <-  sjPlot::plot_model(mod.1b, type = "eff",
                                               terms = c("democracy_stock_lag[all]"),
                                               vcov.fun = "vcovCL", 
                                               vcov.type = "HC1",
                                               vcov.args = "cluster = vdem_subset_v11_noNA_1990_2020$country_id",
                                               alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() +  scale_y_continuous(limits = c(0,0.5), breaks = seq(0,0.5,0.1)) +
  scale_x_continuous(limits = c(0, 0.7), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1990_2020, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_1b <-  sjPlot::plot_model(mod.2b, type = "eff",
                                       terms = c("democracy_stock_lag[all]", "v2xca_academ_lag[0.1, 0.5, 0.9]"),
                                       vcov.fun = "vcovCL", 
                                       vcov.type = "HC1",
                                       vcov.args = "cluster = vdem_subset_v11_noNA_1990_2020$country_id",
                                       alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Electoral Democracy Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1990_2020, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_1b <- data.frame(x = af_edi_onset_1b$data$x,
                              academic_freedom = af_edi_onset_1b$data$group,
                              predicted = af_edi_onset_1b$data$predicted,
                              lower = af_edi_onset_1b$data$conf.low,
                              upper = af_edi_onset_1b$data$conf.high)

af_edi_onset_1b_plot <-ggplot(data = af_edi_onset_1b) +
  geom_ribbon(aes(x = x, y = predicted, fill = academic_freedom, color = academic_freedom, group = academic_freedom, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = academic_freedom, group = academic_freedom), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Democracy Stock") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Academic Freedom Index", fill =  "Academic Freedom Index") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) +
  geom_rug(data = vdem_subset_v11_noNA_1990_2020, aes(x = democracy_stock_lag, y = 0), alpha = .1, position = "jitter", sides = "b")


af_edi_onset_2b <-  sjPlot::plot_model(mod.2b, type = "eff",
                                       terms = c("v2xca_academ_lag[all]", "democracy_stock_lag[0.1, 0.4, 0.6]"),
                                       vcov.fun = "vcovCL", 
                                       vcov.type = "HC1",
                                       vcov.args = "cluster = vdem_subset_v11_noNA_1990_2020$country_id",
                                       alpha = 0.3) + 
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() +  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0,1,0.2)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1990_2020, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

af_edi_onset_2b <- data.frame(x = af_edi_onset_2b$data$x,
                              democracy_stock_lag = af_edi_onset_2b$data$group,
                              predicted = af_edi_onset_2b$data$predicted,
                              lower = af_edi_onset_2b$data$conf.low,
                              upper = af_edi_onset_2b$data$conf.high)

af_edi_onset_2b_plot <-ggplot(data = af_edi_onset_2b) +
  geom_ribbon(aes(x = x, y = predicted, fill = democracy_stock_lag, color = democracy_stock_lag, group = democracy_stock_lag, ymin = lower, ymax = upper), alpha = 0.1) +
  geom_line(aes(x = x, y = predicted, color = democracy_stock_lag, group = democracy_stock_lag), size = 1.05) +
  ylab("P(autocratization onset)") + xlab("Academic Freedom Index") + theme_bw() + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,0.2)) +
  labs(color = "Democracy Stock", fill =  "Democracy Stock") +
  scale_color_okabe_ito(order = c(6,3,5)) +
  scale_fill_okabe_ito(order = c(6,3,5))+
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.20)) + ggtitle("") +
  geom_rug(data = vdem_subset_v11_noNA_1990_2020, aes(x = v2xca_academ_lag, y = 0), alpha = .1, position = "jitter", sides = "b")

figure <- ggpubr::ggarrange(af_onset_b, democracy_stock_onset_b,af_edi_onset_1b_plot, af_edi_onset_2b_plot,
                            nrow = 2, ncol = 2, legend = "bottom", 
                            labels = c("A","B", "C", "D"))

figure <-annotate_figure(figure, top = text_grob("1990-2020", 
                                      color = "black", face = "bold", size = 14))

ggsave("figures/Figure_C8_Appendix.pdf", figure, width = 28, height = 30, units = "cm", dpi = 900, device = "pdf")

